import os

import os
import time
import pymysql
import requests
from lxml import etree


class WeiboAnt:
    def __init__(self):
        #建立数据库连接
        self.db = pymysql.connect("115.238.146.153",
                                  "zhihu",
                                  "123456",
                                  "zhihu",
                                  charset="utf8")
        self.cursor = self.db.cursor()
    def getHot(self):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36',
            'cookie': '_zap=1c849c0c-39c4-4dcc-a05c-2334b3ebd5ab; d_c0="AGDWx4VKlxKPTq0PJ7no2Km9dxhQjA8drPw=|1612170619"; _xsrf=kBGPMrIZsloVZdf9HqSkfZwao2mkiVuT; q_c1=40a02b16f4b2480b832eaa8b79cdbd32|1615212396000|1615212396000; tst=h; __snaker__id=SHnFdEXuXPeIQzhv; gdxidpyhxdE=Yb4fbsac8xjSmhCcyrfAOEpn%2FrxuKfOX7d2IT5l%5CAivYU1wbXXKznKlgXWXp1KxrfbdB%5CKV%2FIJfAzcfariAPG4vr0AuM%2BRisfgUJnM%2BTbZgK7TzJRk77xdEdrWsL0oku4NQp5a7DWBWaS1RQ%5CK9KZOhC793sd47EXmr7l%5CR3AcafdDJl%3A1621522401807; _9755xjdesxxd_=32; YD00517437729195%3AWM_NI=bZsp6ySwtddk8%2Fe3FuR9a%2BWySQ%2FGfkNCi%2B77T7glRXGPtCdMLPJeeEgX99gaA1N%2F6AhAouQsSad5fZQT4%2BpqTlyEhCdlBdidfXBH0J5XAJt8EhHfzCdlaVFw1SQJvvjORmE%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6ee89dc4ff28788a5f76f89968ba2c85f978b8aaeb661ad89adb9b23981bcfc98d02af0fea7c3b92aa286bd91b57391a9bc85e761fcf1abd4c57f92e7afccd673918fe1a8e5618e9da7ccb63d98e9a98dd1548590bca4e74790908686e84fa588f7dae66b83ef82d0b54bac9fa196d47991bfacd6e552938cfcccf37b98b4b7a6eb49a8b48e85ec70a6888ea9b342f7888fabf73487b88887f949a8b4afb7c57df38d9fabf57eaa919e8ec837e2a3; YD00517437729195%3AWM_TID=hErpeQ6%2BqtRBREVAFQNrgtWcTymTJ90Z; captcha_session_v2="2|1:0|10:1621521765|18:captcha_session_v2|88:VUhsQVN5L1YzMzJoQTRTY0ZxVU04czVJeUJvTTkyUU5YV0luRHlTUzg5a0dWWHRSZkV5ZlVMVGh3eGJxT2M0bw==|2237ce8c146e6a2e988f6c6c264432ae865f767032be3ac398904d64d0c4400f"; captcha_ticket_v2="2|1:0|10:1621521783|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6IkNOMzFfTU5NS3dJLkhHVDVQekJPYy1uTVFKMmlHcXR1WGZ6RnVtN0g0STJLSmdUbkFPSDl6aS1IMEMudENSaFNUSEYyYnZOWUVBSWJwX0FzSEpmWjltY0RtanUtYi5TNkhkcnViSmZFT3BHX1BiVXhIWHZYRWFMZXhwMF9nanloS3VZVFlRdU9kV05SWDY0UDhuSi1WQldhbGI3SHBYR0hqSGEyMDFObnhtSldsUzhBTmIxNmZvdUl4NkJCOGpXWHgwVkVZbzltcHNQQXVaLkpSdDBOZVVnNllMZnAtNEFiaEhUUk11ZHBkVmUwQUdxaU1LeGs4Zlp4eklKRTZOMDh0OXR1dVZrYUZxeUFEdGF1TFpXcVlmWWFKNjdsZUNYQjRXU1VPUmlwU2NueDVWVGFXWnNaa1phbmNNbkcuYnYyeUVUTnBSeGQ0VDVhUnlGSTZpRlNlRjFiRy5ZbVZPVXBVdHc0ZHhDYy1yZWFoLkdyUlNWTjZUeVFjMFhqLlROaGlIcmllU2lxS3R1NnJaalNOTnBrLi1tT0hpMDdPRm1GX0VoT0ZlSTkxalFMNTdvQW9hLm9kUHlKNkgtZ3otSW1CaHA2U3JUR2pWclpwZjQxaFQxSng2NG95MXNxT2lmWDQ4ZVhhWEN1TDdvQlNReTluc3Z1dzVnN2FHNDBpUGRBMyJ9|1826ba7bc11b1271cace7e231a793be58ac00378617d057d1aa6d808fd40ddc3"; z_c0="2|1:0|10:1621521793|4:z_c0|92:Mi4xc2xiSURnQUFBQUFBWU5iSGhVcVhFaVlBQUFCZ0FsVk5nY09UWVFEclM3VzlNWXNNZ3ozNEs3OEQycmUyQmNBUk53|6134bc7b208f364d49091013a5c95f1f2d54b5fdaef9b665603edab2d6d13f3a"; unlock_ticket="AFCkh2u2Ig8mAAAAYAJVTYl8pmBI-_O1CJkESc7fcZ2MLz1Mem7uyw=="; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1621522047,1621522144,1621522150,1621522160; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1621522181; SESSIONID=u95JKOQL3nXmMihR2RJk5fMnVeAr5yql4yLbUl7BOKV; JOID=V10TC0xlTNdbOiXRNWgOzeq8YkQkEHbsHnNqg3dUIow4eE65ZGE2cDY4Jtw0ZS7Z5tRoSi4xCi9_q7lEnOje1Uw=; osd=WlgdA0NoSdlTNSjUO2ABwO-yakspFXjkEX5vjX9bL4k2cEG0YW8-fzs9KNQ7aCvX7ttlTyA5BSJ6pbFLke3Q3UM=; tshl=; KLBRSID=4843ceb2c0de43091e0ff7c22eadca8c|1621522193|1621521413'
        }

        url = 'https://s.weibo.com/top/summary'
        page_text = requests.get(url=url,headers=headers).text

        tree = etree.HTML(page_text)
        hot_lists = tree.xpath('//*[@id="pl_top_realtimehot"]/table/tbody/tr')
        # print(hot_lists)
        index = 0
        for hot_msg in hot_lists[0:50]:
            print(index)
            index = index + 1
            hot_link,hot_type,hot_name,hot_degree = None,None,None,None
            try:
                hot_link = 'https://s.weibo.com/' + hot_msg.xpath('./td[2]/a/@href_to')[0]
                hot_name = hot_msg.xpath('./td[2]/a/text()')[0]
            except:
                hot_link = 'https://s.weibo.com/' + hot_msg.xpath('./td[2]/a/@href')[0]
                hot_name = hot_msg.xpath('./td[2]/a/text()')[0]
            try:
                hot_degree = hot_msg.xpath('./td[2]/span/text()')[0]
            except:
                """TODO"""
                print("热搜无热度")
            try:
                hot_type = hot_msg.xpath('./td[3]/i/text()')[0]
                print(hot_type)
            except:
                """TODO"""
                print('热搜无类型')

            sql = "INSERT INTO basic_wb_hot(hot_wb_name, hot_wb_url, hot_wb_type, hot_wb_degree, gmt_create) VALUES " + \
                  "(%s, %s, %s, %s, %s)"
            try:
                self.cursor.execute(sql, [hot_name, hot_link, hot_type, hot_degree,
                                          time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())])
                print(hot_name + '已入库')
                self.db.commit()
            except Exception as e:
                print(e)
                self.db.rollback()  # 发生错误时回滚
if __name__ == '__main__':
    s = time.time()
    wbAut = WeiboAnt()
    wbAut.getHot()
